---
title: "Effects of Host-Dependent Niches and Biotic Constraints on Climate Change Driven Range Shifts in Anemonefish"
author: "Christopher Rauch"
date: last-modified
format:
html:
toc: true
toc-depth: 3
number-sections: true
code-fold: true
code-tools: true
embed-resources: true
self-contained-math: true
execute:
echo: false
warning: false
message: false
params:
run_id: "final_run"
compare_run_id: null
output_root: "outputs"
figures_subdir: "figures_final_index"
tables_subdir: "analysis_tables"
cache_subdir: "analysis_cache"
save_fig_dpi: 300
raster_sample_n: 200000
centroid_sample_n: 50000
max_species_maps_in_report: 50
print_future_maps_in_report: true
topN_maps_for_each_section: 10000
---
```{r}
# ------------------------------------------------------------------------------
# Setup
# ------------------------------------------------------------------------------
suppressPackageStartupMessages ({
library (dplyr)
library (tidyr)
library (purrr)
library (stringr)
library (readr)
library (ggplot2)
library (terra)
})
project_root <- if (requireNamespace ("here" , quietly = TRUE )) here:: here () else getwd ()
run_dir <- file.path (project_root, params$ output_root, params$ run_id)
stats_dir <- file.path (run_dir, "models_stats" )
pred_cur <- file.path (run_dir, "predictions" , "current" )
pred_fut <- file.path (run_dir, "predictions" , "future" )
# ---- Future scenario discovery (robust; may be empty for older runs) ----
list_future_scenarios <- function (pred_fut_root = pred_fut) {
if (is.null (pred_fut_root) || ! dir.exists (pred_fut_root)) return (character (0 ))
sc <- list.dirs (pred_fut_root, recursive = FALSE , full.names = FALSE )
sc <- sc[nzchar (sc)]
sort (unique (sc))
}
future_scenarios <- list_future_scenarios (pred_fut)
cache_dir <- file.path (run_dir, "cache" )
biotic_dir <- file.path (run_dir, "biotic" )
fig_dir <- file.path (run_dir, params$ figures_subdir)
tab_dir <- file.path (run_dir, params$ tables_subdir)
ana_cache <- file.path (run_dir, params$ cache_subdir)
dir.create (fig_dir, recursive = TRUE , showWarnings = FALSE )
dir.create (tab_dir, recursive = TRUE , showWarnings = FALSE )
dir.create (ana_cache, recursive = TRUE , showWarnings = FALSE )
safe_read_csv <- function (path) {
if (! file.exists (path)) return (NULL )
tryCatch (readr:: read_csv (path, show_col_types = FALSE , progress = FALSE ),
error = function (e) NULL )
}
safe_read_rast <- function (path) {
if (! file.exists (path)) return (NULL )
tryCatch (terra:: rast (path), error = function (e) NULL )
}
haversine_km <- function (lon1, lat1, lon2, lat2) {
to_rad <- function (x) x * pi / 180
lon1 <- to_rad (lon1); lat1 <- to_rad (lat1)
lon2 <- to_rad (lon2); lat2 <- to_rad (lat2)
dlon <- lon2 - lon1
dlat <- lat2 - lat1
a <- sin (dlat/ 2 )^ 2 + cos (lat1) * cos (lat2) * sin (dlon/ 2 )^ 2
c <- 2 * atan2 (sqrt (a), sqrt (1 - a))
6371.0 * c
}
save_plot <- function (p, path, dpi = params$ save_fig_dpi, w = 9 , h = 5 ) {
dir.create (dirname (path), recursive = TRUE , showWarnings = FALSE )
ggsave (path, plot = p, dpi = dpi, width = w, height = h, units = "in" )
path
}
save_raster_png <- function (r, path, main = "" , dpi = params$ save_fig_dpi, w = 8 , h = 4.5 ) {
dir.create (dirname (path), recursive = TRUE , showWarnings = FALSE )
grDevices:: png (path, width = w, height = h, units = "in" , res = dpi)
terra:: plot (r, main = main)
grDevices:: dev.off ()
path
}
# Centroid/area from a suitability raster using sampling of suitable cells.
centroid_area_from_raster <- function (prob_r, thr, sample_n = params$ centroid_sample_n) {
if (is.null (prob_r) || ! is.finite (thr)) return (NULL )
suit <- terra:: ifel (prob_r >= thr, 1 , NA )
n_suit <- tryCatch (as.numeric (terra:: global (suit, "sum" , na.rm = TRUE )[1 ,1 ]), error = function (e) 0 )
if (! is.finite (n_suit) || n_suit <= 0 ) return (list (area_km2 = 0 , lon = NA_real_ , lat = NA_real_ ))
cs <- terra:: cellSize (prob_r, unit = "km" )
area_km2 <- as.numeric (terra:: global (cs * suit, "sum" , na.rm = TRUE )[1 ,1 ])
pts <- tryCatch (
terra:: spatSample (suit, size = sample_n, method = "random" , na.rm = TRUE ,
xy = TRUE , values = FALSE ),
error = function (e) NULL
)
if (is.null (pts) || nrow (pts) < 10 ) return (list (area_km2 = area_km2, lon = NA_real_ , lat = NA_real_ ))
list (area_km2 = area_km2, lon = mean (pts[,1 ]), lat = mean (pts[,2 ]))
}
# Prediction file helpers (robust to new + legacy layouts) ----------------------
fish_model_dir <- function (model_type = c ("EnvOnly" ,"HostOnly" ,"Combined" )) {
model_type <- match.arg (model_type)
c (EnvOnly = "fish_env_only" ,
HostOnly = "fish_host_only" ,
Combined = "fish_combined" )[[model_type]]
}
.list_species_from_dir <- function (dir) {
if (is.null (dir) || ! dir.exists (dir)) return (character ())
# Prefer ensemble/mean rasters (handles both flat and nested layouts)
cand <- list.files (
dir,
pattern = "(?i)(^|/)(.*?)(?:_)?(ensemble_mean|mean)(?:_prob)? \\ .(tif|tiff)$" ,
recursive = TRUE ,
full.names = TRUE
)
# Fallback: any tif (still try to infer species)
if (length (cand) == 0 ) {
cand <- list.files (
dir,
pattern = "(?i) \\ .(tif|tiff)$" ,
recursive = TRUE ,
full.names = TRUE
)
}
if (length (cand) == 0 ) return (character ())
base <- basename (cand)
stem <- sub ("(?i) \\ .(tif|tiff)$" , "" , base, perl = TRUE )
# Remove common suffixes; if the name becomes generic, take parent folder as species
sp <- stem |>
str_remove (regex ("(_)?(ensemble_)?mean(_prob)?$" , ignore_case = TRUE )) |>
str_remove (regex ("(_)?prob$" , ignore_case = TRUE )) |>
str_remove (regex ("(_)?pred(iction)?$" , ignore_case = TRUE ))
generic <- is.na (sp) | ! nzchar (sp) | tolower (sp) %in% c ("mean" , "ensemble" , "ensemble_mean" )
if (any (generic)) {
sp_parent <- basename (dirname (cand))
sp[generic] <- sp_parent[generic]
}
sp <- sp[! is.na (sp) & nzchar (sp)]
unique (sp)
}
.list_species_from_flat_dir <- function (dir, pattern = NULL ) {
sp <- .list_species_from_dir (dir)
if (! is.null (pattern)) sp <- sp[grepl (pattern, sp)]
sp
}
list_fish_species_by_model <- function (pred_dir = pred_cur) {
dirs <- list (
EnvOnly = file.path (pred_dir, "fish_env_only" ),
HostOnly = file.path (pred_dir, "fish_host_only" ),
Combined = file.path (pred_dir, "fish_combined" )
)
out <- purrr:: imap (dirs, \(d, nm) .list_species_from_dir (d))
# Legacy fallback: predictions/current/fish/<species>/ensemble_mean_prob.tif
if (all (lengths (out) == 0 L)) {
legacy_root <- file.path (pred_dir, "fish" )
if (dir.exists (legacy_root)) {
sp <- list.dirs (legacy_root, full.names = FALSE , recursive = FALSE )
out <- list (EnvOnly = sp, HostOnly = character (), Combined = character ())
}
}
out
}
list_fish_species <- function (pred_dir = pred_cur) {
sets <- list_fish_species_by_model (pred_dir)
sort (unique (unlist (sets, use.names = FALSE )))
}
find_fish_raster <- function (
species,
model_type = c ("EnvOnly" ,"HostOnly" ,"Combined" ),
which = c ("current" ,"future" ),
scenario = NULL ,
stat = c ("mean" ,"sd" )
) {
model_type <- match.arg (model_type)
which <- match.arg (which)
stat <- match.arg (stat)
subdir <- fish_model_dir (model_type)
suffix <- if (stat == "mean" ) "_mean.tif" else "_sd.tif"
# New layout
f <- if (which == "current" ) {
file.path (pred_cur, subdir, paste0 (species, suffix))
} else {
if (is.null (scenario)) stop ("scenario required for future fish raster" )
file.path (pred_fut, scenario, subdir, paste0 (species, suffix))
}
if (file.exists (f)) return (f)
# Legacy fallback
if (which == "current" && stat == "mean" ) {
f2 <- file.path (pred_cur, "fish" , species, "ensemble_mean_prob.tif" )
if (file.exists (f2)) return (f2)
}
if (which == "future" && stat == "mean" && ! is.null (scenario)) {
f2 <- file.path (pred_fut, scenario, "fish" , species, "ensemble_mean_prob.tif" )
if (file.exists (f2)) return (f2)
}
NULL
}
# Interaction matrix (optional; for specialist/generalist grouping)
interaction_path <- file.path (project_root, "data" , "interaction_matrix.csv" )
interaction <- NULL
if (file.exists (interaction_path)) {
interaction <- tryCatch ({
m <- utils:: read.csv (interaction_path, row.names = 1 , check.names = FALSE )
m <- as.matrix (m)
rownames (m) <- gsub (" \\ ." , "_" , rownames (m))
colnames (m) <- gsub (" \\ ." , "_" , colnames (m))
m
}, error = function (e) NULL )
}
get_n_hosts <- function (sp, interaction) {
if (is.null (interaction)) return (NA_integer_ )
if (is.matrix (interaction) || is.data.frame (interaction)) {
rn <- rownames (interaction)
if (is.null (rn) || ! (sp %in% rn)) return (NA_integer_ )
return (sum (interaction[sp, , drop = TRUE ] > 0 , na.rm = TRUE ))
}
if (is.list (interaction)) {
v <- interaction[[sp]]
if (is.null (v)) return (NA_integer_ )
return (sum (v > 0 , na.rm = TRUE ))
}
NA_integer_
}
fish_species_sets <- list_fish_species_by_model (pred_cur)
fish_groups <- tibble:: tibble (species = list_fish_species (pred_cur)) |>
dplyr:: mutate (
n_hosts = purrr:: map_int (species, \(sp) get_n_hosts (sp, interaction)),
group = dplyr:: case_when (
is.na (n_hosts) ~ "unknown" ,
n_hosts >= 3 ~ "generalist" ,
TRUE ~ "specialist"
)
)
readr:: write_csv (fish_groups, file.path (tab_dir, "fish_groups.csv" ))
# Concrete species vector used throughout this report (may be empty).
fish_species <- fish_groups$ species
if (is.null (fish_species)) fish_species <- character (0 )
# Helper: get group label for a fish species.
fish_group_of <- function (sp) {
g <- fish_groups$ group[match (sp, fish_groups$ species)]
if (length (g) == 0 || is.na (g)) return ("unknown" )
as.character (g)
}
# ------------------------------------------------------------------------------
# GLOBAL SPECIES FILTERING (Quality Control)
# ------------------------------------------------------------------------------
# Based on pipeline logs, these species failed strict spatial blocking or had insufficient data.
# They are excluded from ALL post-analysis to ensure robust comparisons.
excluded_species <- c (
"Amphiprion_chagosensis" , # Range too small for spatial blocking
"Amphiprion_mccullochi" , # Data deficiency (<15 points)
"Amphiprion_omanensis" , # Sample size too small after cleaning
"Amphiprion_barberi" , # Background sampling failed (small range)
"Amphiprion_latezonatus" , # Spatial blocking failed
"Amphiprion_clarkii" , # Spatial blocking failed
"Premnas_biaculeatus" , # Spatial blocking failed
"Amphiprion_bicinctus" , # Partial failure (failed Combined model) & Geometric artifact
"Amphiprion_sandaracinos" , # Partial failure (failed Combined/HostOnly)
"Amphiprion_perideraion" , # Partial failure (failed Combined/HostOnly)
"Amphiprion_sebae" # AUC/TSS too low
)
# Filter the master species list
original_count <- length (fish_species)
fish_species <- setdiff (fish_species, excluded_species)
fish_groups <- fish_groups %>% filter (species %in% fish_species)
cat (sprintf (" \n --- QUALITY CONTROL FILTER --- \n Original Species: %d \n Excluded: %d \n Final Analysis Set: %d \n " ,
original_count, length (excluded_species), length (fish_species)))
cat ("Excluded Species:" )
print (excluded_species)
available_models <- c ("EnvOnly" ,"HostOnly" ,"Combined" )
```
# Purpose of this document
This Quarto document is the analysis and figure-production layer for my thesis. It is designed to:
- load SDM outputs from `outputs/{run_id}/`
- summarise and compare fish model performance across three formulations\
**A** (EnvOnly), **B** (HostOnly), **C** (Combined)
- quantify **ghost habitat** as the spatial gap between climatic suitability and host-constrained suitability
- quantify **range shifts** and the **biotic constraint on expansion** under future scenarios
- save all figures to a clean directory structure for later transfer to LaTeX
If an expected file is missing (e.g., host predictions not yet generated), the corresponding analyses will skip and the rest of the report will still render.
# Methods
## Predictor sets and model definitions
Environmental predictors are principal components of the marine environment (PC1–PC4). Rugosity is included as a static predictor representing habitat structure.
Fish models are fit in three formulations:
- **EnvOnly (A):** PC1–PC4 + rugosity\
- **HostOnly (B):** rugosity + fish-specific biotic host index\
- **Combined (C):** PC1–PC4 + rugosity + fish-specific biotic host index
The **biotic host index** is constructed from host anemone predictions (mean probability surfaces). If an interaction matrix is available, host predictions are weighted by the fish–host association strength and summed.
## Why background bias uses fewer PCs (`BG_ENV_VARS`)
Background points are sampled using a bias-aware strategy: candidate background points are drawn from the modelling domain and then reweighted toward **environmental similarity** to presence points in a reduced environmental subspace:
``` r
BG_ENV_VARS <- c ("PC1" ,"PC2" )
```
Using only PC1–PC2 is typically more defensible than using PC1–PC4 for bias correction because:
- PC1–PC2 usually represent the strongest, smoothest gradients; they stabilise bias weighting.
- Higher PCs are often noisier or more local; including them can over-condition the background and reduce transferability.
- Bias correction is about *sampling effort*, not the full ecological response.
It is valid to expand `BG_ENV_VARS` to PC1–PC4 if PC3/PC4 represent strong, interpretable gradients and you can defend that they reflect sampling bias.
## Spatial cross-validation and repeated evaluation
Model performance is assessed with spatially blocked holdout, repeated across bootstrap iterations. Metrics summarised here include AUC, TSS, sensitivity, specificity, log-loss, Brier score, and Boyce index.
## Ensemble predictions
For each species/model type, multiple bootstrap models are projected to raster space; the mean and standard deviation of predicted probability are saved and used in post-analysis.
# Data checks
```{r}
checks <- tibble:: tibble (
item = c ("run_dir" ,"stats_dir" ,"pred_current" ,"pred_future" ,"cache_dir" ,"biotic_dir" ),
path = c (run_dir, stats_dir, pred_cur, pred_fut, cache_dir, biotic_dir),
exists = file.exists (c (run_dir, stats_dir, pred_cur, pred_fut, cache_dir, biotic_dir))
)
checks
```
# Model performance
## Load per-iteration statistics
```{r}
read_stats_glob <- function (stats_dir, pattern) {
files <- list.files (stats_dir, pattern = pattern, full.names = TRUE )
if (length (files) == 0 ) return (NULL )
tbls <- map (files, safe_read_csv)
tbls <- tbls[! vapply (tbls, is.null, logical (1 ))]
if (length (tbls) == 0 ) return (NULL )
bind_rows (tbls, .id = "file_id" ) |>
mutate (file = basename (files[as.integer (file_id)])) |>
dplyr:: select (- file_id)
}
fish_stats <- read_stats_glob (stats_dir, "^.*_Fish(EnvOnly|HostOnly|Combined)_stats \\ .csv$" )
host_stats <- read_stats_glob (stats_dir, "^.*_Host_stats \\ .csv$" )
if (! is.null (fish_stats)) {
fish_stats <- fish_stats |>
mutate (
species = if ("species" %in% names (fish_stats)) species else str_replace (file, "_Fish.*$" , "" ),
model_type = if ("model_type" %in% names (fish_stats)) model_type else str_match (file, "_Fish(.*)_stats" )[,2 ]
) |>
left_join (fish_groups, by = "species" )
# Fail-safe: if a metric column is missing in older runs, create it as NA
needed <- c ("auc" ,"tss" ,"boyce" ,"brier" ,"logloss" ,"threshold_maxTSS" ,"sensitivity" ,"specificity" )
for (cl in needed) if (! (cl %in% names (fish_stats))) fish_stats[[cl]] <- NA_real_
}
if (! is.null (host_stats)) {
host_stats <- host_stats |>
mutate (species = if ("species" %in% names (host_stats)) species else str_replace (file, "_Host_stats \\ .csv$" , "" ))
# Fail-safe for older runs
needed_h <- c ("auc" ,"tss" ,"boyce" ,"brier" ,"logloss" )
for (cl in needed_h) if (! (cl %in% names (host_stats))) host_stats[[cl]] <- NA_real_
}
compare_fish_stats <- NULL
if (! is.null (params$ compare_run_id)) {
compare_dir <- file.path (project_root, params$ output_root, params$ compare_run_id, "models_stats" )
if (dir.exists (compare_dir)) {
compare_fish_stats <- read_stats_glob (compare_dir, "^.*_Fish(EnvOnly|HostOnly|Combined)_stats \\ .csv$" )
if (! is.null (compare_fish_stats)) {
compare_fish_stats <- compare_fish_stats |>
mutate (
species = if ("species" %in% names (compare_fish_stats)) species else str_replace (file, "_Fish.*$" , "" ),
model_type = if ("model_type" %in% names (compare_fish_stats)) model_type else str_match (file, "_Fish(.*)_stats" )[,2 ],
run = params$ compare_run_id
)
}
}
}
if (! is.null (fish_stats)) fish_stats$ run <- params$ run_id
list (
fish_rows = if (is.null (fish_stats)) 0 else nrow (fish_stats),
host_rows = if (is.null (host_stats)) 0 else nrow (host_stats),
fish_species = if (is.null (fish_stats)) 0 else n_distinct (fish_stats$ species),
compare_loaded = ! is.null (compare_fish_stats)
)
```
## Summary tables
```{r}
summarise_fish <- function (df) {
if (is.null (df)) return (NULL )
df |>
group_by (run, species, model_type, group) |>
summarise (
n_iter = n (),
auc_mean = mean (auc, na.rm = TRUE ),
auc_sd = sd (auc, na.rm = TRUE ),
tss_mean = mean (tss, na.rm = TRUE ),
tss_sd = sd (tss, na.rm = TRUE ),
boyce_mean = mean (boyce, na.rm = TRUE ),
brier_mean = mean (brier, na.rm = TRUE ),
logloss_mean = mean (logloss, na.rm = TRUE ),
thr_mean = mean (threshold_maxTSS, na.rm = TRUE ),
sens_mean = mean (sensitivity, na.rm = TRUE ),
spec_mean = mean (specificity, na.rm = TRUE ),
.groups = "drop"
)
}
fish_summary <- summarise_fish (fish_stats)
if (! is.null (fish_summary)) write_csv (fish_summary, file.path (tab_dir, "fish_performance_summary.csv" ))
fish_summary |> arrange (desc (tss_mean)) |> head (20 )
```
## Host model performance (quality check)
```{r}
if (! is.null (host_stats)) {
host_summary <- host_stats |>
group_by (species) |>
summarise (
n_iter = n (),
auc_mean = mean (auc, na.rm = TRUE ),
tss_mean = mean (tss, na.rm = TRUE ),
boyce_mean = mean (boyce, na.rm = TRUE ),
brier_mean = mean (brier, na.rm = TRUE ),
logloss_mean = mean (logloss, na.rm = TRUE ),
.groups = "drop"
)
write_csv (host_summary, file.path (tab_dir, "host_performance_summary.csv" ))
host_summary |> arrange (desc (tss_mean)) |> head (20 )
} else {
tibble:: tibble (note = "No host stats found for this run." )
}
```
## Distributions of performance metrics
```{r}
## Distributions of performance metrics
# Performance boxplots (fail-safe) with consistent coloring
safe_plot_perf_box <- function (df, metric, ylab, title, out_path) {
if (is.null (df) || nrow (df) == 0 ) return (invisible (NULL ))
req <- c ("model_type" , "group" , metric)
if (! all (req %in% names (df))) return (invisible (NULL ))
df2 <- df |>
mutate (
# Ensure factor levels match the color mapping keys
model_type = factor (model_type, levels = c ("EnvOnly" , "HostOnly" , "Combined" )),
group = as.factor (group),
.metric = suppressWarnings (as.numeric (.data[[metric]]))
) |>
filter (is.finite (.metric) & ! is.na (model_type) & ! is.na (group))
if (nrow (df2) < 5 || dplyr:: n_distinct (df2$ model_type) < 1 ) {
message ("Skipping " , metric, " boxplot (insufficient non-missing values)." )
return (invisible (NULL ))
}
# Define colors to match extension code
model_colors <- c ("EnvOnly" = "#E69F00" ,
"HostOnly" = "#56B4E9" ,
"Combined" = "#009E73" )
p <- ggplot (df2, aes (x = model_type, y = .metric, fill = model_type)) +
geom_boxplot (outlier.alpha = 0.2 , na.rm = TRUE , alpha = 0.8 ) +
scale_fill_manual (values = model_colors) +
facet_wrap (~ group, nrow = 1 ) +
labs (x = NULL , y = ylab, fill = "Model Type" ) +
theme_minimal () +
theme (
legend.position = "none" , # Hide legend if x-axis labels are sufficient, or "bottom"
axis.text.x = element_text (angle = 45 , hjust = 1 )
)
print (p)
save_plot (p, out_path)
invisible (p)
}
if (! is.null (fish_stats) && nrow (fish_stats) > 0 ) {
safe_plot_perf_box (
fish_stats, "auc" ,
"AUC (blocked holdout)" ,
"Fish model performance (AUC)" ,
file.path (fig_dir, "01_performance" , "fish_auc_by_model.png" )
)
safe_plot_perf_box (
fish_stats, "tss" ,
"TSS (blocked holdout)" ,
"Fish model performance (TSS)" ,
file.path (fig_dir, "01_performance" , "fish_tss_by_model.png" )
)
safe_plot_perf_box (
fish_stats, "boyce" ,
"Boyce index" ,
"Fish model performance (Boyce)" ,
file.path (fig_dir, "01_performance" , "fish_boyce_by_model.png" )
)
}
```
## Future fish maps (saved; optional printing)
```{r}
for (sc in future_scenarios) {
for (sp in fish_species) {
grp <- fish_group_of (sp)
for (mt in available_models) {
p <- find_fish_raster (sp, mt, "future" , scenario = sc)
if (is.null (p)) next
r <- safe_read_rast (p)
if (is.null (r)) next
out_png <- file.path (fig_dir, "02_maps" , "future" , sc, grp, sp, sprintf ("%s_%s_mean.png" , mt, sc))
save_raster_png (r, out_png, main = sprintf ("%s — %s (%s mean)" , sp, mt, sc))
if (isTRUE (params$ print_future_maps_in_report)) {
cat (sprintf (" \n\n ## %s (%s) \n\n ### %s — %s mean \n\n " , sp, grp, mt, sc))
knitr:: include_graphics (out_png) |> print ()
}
}
}
}
```
# Ghost habitat
Ghost habitat is defined as areas suitable under **EnvOnly** but not suitable under **Combined**, using each model’s mean `threshold_maxTSS` across iterations when available.
```{r}
# ==============================================================================
# 2. GHOST HABITAT ANALYSIS (ALL SCENARIOS) - NAMESPACED
# ==============================================================================
# Ensure background map data exists
if (! exists ("world_land" )) {
# Load land if missing
if (requireNamespace ("rnaturalearth" , quietly= TRUE )) {
world_land <- rnaturalearth:: ne_countries (scale = "medium" , returnclass = "sf" )
}
}
# Helper for plotting Ghost Habitat (Explicit Namespaces)
plot_ghost_map <- function (r, title_text) {
ggplot2:: ggplot () +
# Add Land (Gray)
ggplot2:: geom_sf (data = world_land, fill = "gray80" , color = "white" , size = 0.1 ) +
# Add Ghost Raster (Red for Ghost Habitat)
tidyterra:: geom_spatraster (data = r) +
ggplot2:: scale_fill_gradient (
low = "#D73027" , high = "#D73027" , # Red for "danger/loss"
na.value = "transparent" ,
name = "Ghost Habitat"
) +
ggplot2:: coord_sf (xlim = c (30 , 180 ), ylim = c (- 40 , 40 ), expand = FALSE ) +
ggplot2:: labs (title = title_text) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text = ggplot2:: element_blank (),
axis.ticks = ggplot2:: element_blank (),
panel.grid = ggplot2:: element_blank (),
plot.title = ggplot2:: element_text (face = "bold" , hjust = 0.5 )
)
}
ghost_tbl <- NULL
if (! is.null (fish_summary)) {
# Get thresholds from summary (EnvOnly/Combined means)
thr_tbl <- fish_summary %>%
group_by (species, model_type) %>%
summarise (thr = mean (thr_mean, na.rm = TRUE ), .groups= "drop" )
ghost_rows <- list ()
scenarios_to_run <- c ("current" , future_scenarios)
cat (" \n --- Calculating Ghost Habitat for All Scenarios --- \n " )
for (sp in fish_species) {
grp <- fish_groups$ group[fish_groups$ species == sp]
# Get Species-Specific Thresholds
tA <- thr_tbl$ thr[thr_tbl$ species== sp & thr_tbl$ model_type== "EnvOnly" ]
tC <- thr_tbl$ thr[thr_tbl$ species== sp & thr_tbl$ model_type== "Combined" ]
if (length (tA)== 0 ) tA <- 0.5
if (length (tC)== 0 ) tC <- 0.5
for (sc in scenarios_to_run) {
# 1. Load Rasters
pA <- find_fish_raster (sp, "EnvOnly" , if (sc== "current" ) "current" else "future" , scenario = if (sc!= "current" ) sc else NULL )
pC <- find_fish_raster (sp, "Combined" , if (sc== "current" ) "current" else "future" , scenario = if (sc!= "current" ) sc else NULL )
if (is.null (pA) || is.null (pC)) next
rA <- safe_read_rast (pA)
rC <- safe_read_rast (pC)
if (is.null (rA) || is.null (rC)) next
# 2. Binary Suitability
suitA <- terra:: ifel (rA >= tA, 1 , 0 )
suitC <- terra:: ifel (rC >= tC, 1 , 0 )
# 3. Align Extents
if (! terra:: compareGeom (suitA, suitC, stopOnError= FALSE )) {
suitC <- terra:: resample (suitC, suitA, method= "near" )
suitC <- terra:: mask (suitC, suitA)
}
# 4. Calculate Ghost Habitat
ghost <- terra:: ifel ((suitA == 1 ) & (suitC == 0 ), 1 , NA )
names (ghost) <- "ghost_habitat"
# 5. Calculate Stats
cs <- terra:: cellSize (rA, unit= "km" )
ghost_km2 <- as.numeric (terra:: global (cs * (terra:: ifel (is.na (ghost), 0 , 1 )), "sum" , na.rm= TRUE )[1 ,1 ])
pot_km2 <- as.numeric (terra:: global (cs * suitA, "sum" , na.rm= TRUE )[1 ,1 ])
ghost_rows[[length (ghost_rows)+ 1 ]] <- tibble (
species = sp, group = grp, scenario = sc,
potential_km2 = pot_km2, ghost_km2 = ghost_km2,
pct_ghost = ifelse (pot_km2 > 0 , (ghost_km2 / pot_km2) * 100 , 0 )
)
# 6. Save Map
out_png <- file.path (fig_dir, "04_ghost_habitat" , sc, grp, sp, "ghost.png" )
dir.create (dirname (out_png), recursive= TRUE , showWarnings= FALSE )
p_ghost <- plot_ghost_map (ghost, paste0 (gsub ("_" , " " , sp), " - Ghost (" , sc, ")" ))
ggsave (out_png, p_ghost, width= 8 , height= 5 , bg= "white" )
print (p_ghost)
}
}
# 7. Save Summary Table
ghost_df <- bind_rows (ghost_rows)
if (nrow (ghost_df) > 0 ) {
write_csv (ghost_df, file.path (tab_dir, "ghost_habitat_summary_all_scenarios.csv" ))
# --- FIX LABELS START ---
ghost_df <- ghost_df |>
mutate (scenario = str_replace_all (scenario, "ssp( \\ d)( \\ d)( \\ d)_( \\ d+)" , "SSP \\ 1- \\ 2. \\ 3 \\ 4" ))
# --- FIX LABELS END ---
# 8. Plot Trends (Namespaced)
p_ghost_trend <- ggplot2:: ggplot (ghost_df, ggplot2:: aes (x= scenario, y= ghost_km2, fill= group)) +
ggplot2:: geom_boxplot (outlier.alpha = 0.5 ) +
ggplot2:: facet_wrap (~ group, scales= "free_y" ) +
ggplot2:: scale_fill_manual (values= c ("generalist" = "#E69F00" , "specialist" = "#56B4E9" )) +
ggplot2:: theme_minimal () +
ggplot2:: theme (axis.text.x = ggplot2:: element_text (angle= 45 , hjust= 1 )) +
ggplot2:: labs (y= expression (Area~ (km^ 2 )))
ggsave (file.path (fig_dir, "04_ghost_habitat" , "ghost_area_trend.png" ), p_ghost_trend, w= 10 , h= 6 , bg= "white" )
print (p_ghost_trend)
}
}
```
```{r}
if (! is.null (ghost_tbl)) {
p <- ggplot (ghost_tbl, aes (x = group, y = ghost_km2)) +
geom_boxplot (outlier.alpha = 0.2 ) +
labs (x = NULL , y = "Ghost habitat area (km²)" )
print (p)
save_plot (p, file.path (fig_dir, "04_ghost_habitat" , "ghost_area_by_group.png" ))
# include top N ghost maps in the report
topN <- params$ topN_maps_for_each_section
top <- ghost_tbl |> arrange (desc (ghost_km2)) |> head (topN)
for (i in seq_len (nrow (top))) {
sp <- top$ species[i]
grp <- top$ group[i]
png <- file.path (fig_dir, "04_ghost_habitat" , grp, sp, "ghost_current.png" )
if (file.exists (png)) {
cat (sprintf (" \n\n ## Ghost habitat map — %s (%s) \n\n " , sp, grp))
knitr:: include_graphics (png) |> print ()
}
}
}
```
# Host–fish niche overlap and coupling
This section quantifies host–fish spatial coupling using niche overlap metrics. For each fish–host association (from the interaction matrix, if available), we compute overlap between fish suitability surfaces and host suitability surfaces.
We report two overlap types:
- **Fish climatic niche vs host niche:** EnvOnly (fish) vs host ENM\
- **Fish realised niche vs host niche:** Combined (fish) vs host ENM
Overlap is computed on a random sample of raster cells where both rasters are defined, using:
- **Schoener’s D:** ( D = 1 - \frac{1}{2}\sum \| p_1 - p_2\| )\
- **Hellinger’s I:** ( I = \sum \sqrt{p_1 p_2} )
where (p_1) and (p_2) are normalised suitability weights (sum to 1 over sampled cells).
```{r}
# ------------------------------------------------------------------------------
# Overlap utilities
# ------------------------------------------------------------------------------
schoener_D <- function (v1, v2) {
v1 <- pmax (v1, 0 ); v2 <- pmax (v2, 0 )
s1 <- sum (v1, na.rm = TRUE ); s2 <- sum (v2, na.rm = TRUE )
if (! is.finite (s1) || ! is.finite (s2) || s1 <= 0 || s2 <= 0 ) return (NA_real_ )
p1 <- v1 / s1; p2 <- v2 / s2
1 - 0.5 * sum (abs (p1 - p2), na.rm = TRUE )
}
hellinger_I <- function (v1, v2) {
v1 <- pmax (v1, 0 ); v2 <- pmax (v2, 0 )
s1 <- sum (v1, na.rm = TRUE ); s2 <- sum (v2, na.rm = TRUE )
if (! is.finite (s1) || ! is.finite (s2) || s1 <= 0 || s2 <= 0 ) return (NA_real_ )
p1 <- v1 / s1; p2 <- v2 / s2
sum (sqrt (p1 * p2), na.rm = TRUE )
}
sample_pair_values <- function (r1, r2, n = params$ raster_sample_n) {
if (is.null (r1) || is.null (r2)) return (NULL )
if (! terra:: compareGeom (r1, r2, stopOnError = FALSE )) {
r2 <- terra:: resample (r2, r1, method = "bilinear" )
}
mask <- terra:: ifel (! is.na (r1) & ! is.na (r2), 1 , NA )
pts <- tryCatch (terra:: spatSample (mask, size = n, method = "random" , na.rm = TRUE ,
xy = TRUE , values = FALSE ),
error = function (e) NULL )
if (is.null (pts) || nrow (pts) < 200 ) return (NULL )
vals1 <- base:: tryCatch (terra:: extract (r1, pts), error = function (e) NULL )
vals2 <- base:: tryCatch (terra:: extract (r2, pts), error = function (e) NULL )
if (base:: is.null (vals1) || base:: is.null (vals2)) return (NULL )
v1 <- vals1[[base:: ncol (vals1)]]
v2 <- vals2[[base:: ncol (vals2)]]
ok <- base:: is.finite (v1) & base:: is.finite (v2)
if (base:: sum (ok) < 200 ) return (NULL )
list (v1 = v1[ok], v2 = v2[ok])
}
find_host_raster <- function (
host,
which = c ("current" ,"future" ),
scenario = NULL ,
stat = c ("mean" ,"sd" )
) {
which <- match.arg (which)
stat <- match.arg (stat)
suffix <- if (stat == "mean" ) "_mean.tif" else "_sd.tif"
# New layout
f <- if (which == "current" ) {
file.path (pred_cur, "hosts" , paste0 (host, suffix))
} else {
if (is.null (scenario)) stop ("scenario required for future host raster" )
file.path (pred_fut, scenario, "hosts" , paste0 (host, suffix))
}
if (file.exists (f)) return (f)
# Legacy fallback (mean only)
if (stat == "mean" ) {
f2 <- if (which == "current" ) {
file.path (pred_cur, "hosts" , host, "current_mean_prob.tif" )
} else {
file.path (pred_fut, scenario, "hosts" , host, "future_mean_prob.tif" )
}
if (file.exists (f2)) return (f2)
}
NULL
}
# Association list (fish × host pairs with w>0) --------------------------------
interaction_long <- function (interaction) {
if (is.null (interaction)) return (NULL )
# Matrix / wide form: rownames = fish, colnames = hosts
if (is.matrix (interaction) || (is.data.frame (interaction) && ! any (c ("fish" ,"host" ) %in% names (interaction)))) {
m <- as.matrix (interaction)
if (is.null (rownames (m)) || is.null (colnames (m))) return (NULL )
df <- as.data.frame (as.table (m), stringsAsFactors = FALSE )
names (df) <- c ("fish" ,"host" ,"w" )
df <- dplyr:: filter (df, ! is.na (w), w > 0 )
return (df)
}
# Long form: columns fish, host, w (or any numeric weight column)
if (is.data.frame (interaction) && all (c ("fish" ,"host" ) %in% names (interaction))) {
df <- interaction
if (! ("w" %in% names (df))) {
w_col <- setdiff (names (df), c ("fish" ,"host" ))[1 ]
if (! is.null (w_col)) df$ w <- df[[w_col]]
}
if (! ("w" %in% names (df))) return (NULL )
df <- dplyr:: filter (df, ! is.na (w), w > 0 )
return (df)
}
NULL
}
assoc <- interaction_long (interaction)
if (is.null (assoc) || nrow (assoc) == 0 ) {
message ("No interaction pairs available (interaction matrix missing or empty). Skipping pair overlap." )
assoc <- tibble:: tibble (fish = character (), host = character (), w = numeric ())
}
overlap_rows <- list ()
if (! is.null (assoc) && nrow (assoc) > 0 ) {
# define scenarios to compute (include "current")
sc_all <- c ("current" , future_scenarios)
for (sc in sc_all) {
for (i in seq_len (nrow (assoc))) {
fish <- assoc$ fish[i]
host <- assoc$ host[i]
w <- assoc$ w[i]
# fish rasters (EnvOnly and Combined)
f_env_path <- if (sc == "current" ) find_fish_raster (fish, "EnvOnly" , "current" ) else find_fish_raster (fish, "EnvOnly" , "future" , scenario = sc)
f_com_path <- if (sc == "current" ) find_fish_raster (fish, "Combined" , "current" ) else find_fish_raster (fish, "Combined" , "future" , scenario = sc)
h_path <- if (sc == "current" ) find_host_raster (host, "current" ) else find_host_raster (host, "future" , scenario = sc)
if (is.null (f_env_path) || is.null (f_com_path) || is.null (h_path)) next
f_env <- safe_read_rast (f_env_path)
f_com <- safe_read_rast (f_com_path)
h_r <- safe_read_rast (h_path)
if (is.null (f_env) || is.null (f_com) || is.null (h_r)) next
# sample and compute overlap
sam1 <- sample_pair_values (f_env, h_r, n = params$ raster_sample_n)
sam2 <- sample_pair_values (f_com, h_r, n = params$ raster_sample_n)
if (is.null (sam1) || is.null (sam2)) next
overlap_rows[[length (overlap_rows)+ 1 ]] <- tibble:: tibble (
scenario = sc,
fish = fish,
host = host,
weight = w,
group = fish_groups$ group[fish_groups$ species == fish] |> as.character (),
D_env_host = schoener_D (sam1$ v1, sam1$ v2),
I_env_host = hellinger_I (sam1$ v1, sam1$ v2),
D_comb_host = schoener_D (sam2$ v1, sam2$ v2),
I_comb_host = hellinger_I (sam2$ v1, sam2$ v2)
)
}
}
}
overlap_tbl <- if (length (overlap_rows) > 0 ) bind_rows (overlap_rows) else NULL
if (! is.null (overlap_tbl)) write_csv (overlap_tbl, file.path (tab_dir, "host_fish_overlap.csv" ))
overlap_tbl |> head (20 )
```
```{r}
if (! is.null (overlap_tbl)) {
# Change relative to current
cur <- overlap_tbl |> filter (scenario == "current" ) |> dplyr:: select (fish, host, D_env_host, D_comb_host)
delta <- overlap_tbl |>
filter (scenario != "current" ) |>
left_join (cur, by = c ("fish" ,"host" ), suffix = c ("_fut" ,"_cur" )) |>
mutate (
delta_D_env = D_env_host_fut - D_env_host_cur,
delta_D_comb = D_comb_host_fut - D_comb_host_cur
)
write_csv (delta, file.path (tab_dir, "host_fish_overlap_deltas.csv" ))
# --- FIX LABELS START ---
# Converts ssp585_2050 -> SSP5-8.5 2050
delta <- delta |>
mutate (scenario = str_replace_all (scenario, "ssp( \\ d)( \\ d)( \\ d)_( \\ d+)" , "SSP \\ 1- \\ 2. \\ 3 \\ 4" ))
# --- FIX LABELS END ---
# Define consistent colors
my_colors <- c ("generalist" = "#E69F00" , "specialist" = "#56B4E9" )
# Plot 1: EnvOnly Delta
p1 <- ggplot (delta, aes (x = group, y = delta_D_env, fill = group)) +
geom_boxplot (outlier.alpha = 0.2 ) +
facet_wrap (~ scenario) +
scale_fill_manual (values = my_colors) +
labs (x = NULL , y = "Δ Schoener's D (EnvOnly fish vs host)" ) +
theme_minimal () +
theme (legend.position = "none" )
print (p1)
save_plot (p1, file.path (fig_dir, "03_overlap" , "delta_D_env_host.png" ), w = 11 , h = 6 )
# Plot 2: Combined Delta (The important one)
p2 <- ggplot (delta, aes (x = group, y = delta_D_comb, fill = group)) +
geom_boxplot (outlier.alpha = 0.2 ) +
facet_wrap (~ scenario) +
scale_fill_manual (values = my_colors) +
labs (x = NULL , y = "Δ Schoener's D (Combined fish vs host)" ) +
theme_minimal () +
theme (legend.position = "none" )
print (p2)
save_plot (p2, file.path (fig_dir, "03_overlap" , "delta_D_comb_host.png" ), w = 11 , h = 6 )
# Variance / heterogeneity table
var_tbl <- delta |>
group_by (scenario, group) |>
summarise (
var_delta_env = var (delta_D_env, na.rm = TRUE ),
var_delta_comb = var (delta_D_comb, na.rm = TRUE ),
n = n (),
.groups = "drop"
)
write_csv (var_tbl, file.path (tab_dir, "overlap_change_variance.csv" ))
var_tbl
}
```
# Range shifts and biotic constraints
We summarise shifts using centroid displacement (km) between current and each future scenario, separately for EnvOnly and Combined. The **biotic constraint** is:
Delta = Shift-EnvOnly - ShiftCombined
Positive values indicate that hosts constrain the magnitude of projected range shift.
```{r}
shift_tbl <- NULL
if (! is.null (fish_summary) && length (future_scenarios) > 0 ) {
thr_tbl <- fish_summary |> dplyr:: select (species, model_type, thr_mean)
shift_rows <- list ()
for (sp in fish_species) {
grp <- fish_group_of (sp)
for (mt in c ("EnvOnly" ,"Combined" )) {
thr <- thr_tbl |> filter (species == sp, model_type == mt) |> pull (thr_mean)
thr <- if (length (thr) == 0 || ! is.finite (thr)) 0.5 else thr
p_cur <- find_fish_raster (sp, mt, "current" )
if (is.null (p_cur)) next
r_cur <- safe_read_rast (p_cur)
if (is.null (r_cur)) next
cur_ca <- centroid_area_from_raster (r_cur, thr)
for (sc in future_scenarios) {
p_fut <- find_fish_raster (sp, mt, "future" , scenario = sc)
if (is.null (p_fut)) next
r_fut <- safe_read_rast (p_fut)
if (is.null (r_fut)) next
fut_ca <- centroid_area_from_raster (r_fut, thr)
dist_km <- if (is.finite (cur_ca$ lon) && is.finite (fut_ca$ lon)) {
haversine_km (cur_ca$ lon, cur_ca$ lat, fut_ca$ lon, fut_ca$ lat)
} else NA_real_
shift_rows[[length (shift_rows)+ 1 ]] <- tibble:: tibble (
species = sp, group = grp, model_type = mt, scenario = sc,
thr = thr,
area_current_km2 = cur_ca$ area_km2,
area_future_km2 = fut_ca$ area_km2,
lon_current = cur_ca$ lon, lat_current = cur_ca$ lat,
lon_future = fut_ca$ lon, lat_future = fut_ca$ lat,
shift_km = dist_km
)
}
}
}
shift_tbl <- if (length (shift_rows) > 0 ) bind_rows (shift_rows) else NULL
if (! is.null (shift_tbl)) write_csv (shift_tbl, file.path (tab_dir, "centroid_shifts.csv" ))
}
shift_tbl |> head (20 )
```
```{r}
if (! is.null (shift_tbl)) {
constraint <- shift_tbl |>
dplyr:: select (species, group, scenario, model_type, shift_km) |>
pivot_wider (names_from = model_type, values_from = shift_km) |>
mutate (constraint_km = EnvOnly - Combined)
write_csv (constraint, file.path (tab_dir, "biotic_constraint_km.csv" ))
p <- ggplot (constraint, aes (x = group, y = constraint_km)) +
geom_boxplot (outlier.alpha = 0.2 ) +
facet_wrap (~ scenario) +
labs (x = NULL , y = "EnvOnly shift − Combined shift (km)" )
print (p)
save_plot (p, file.path (fig_dir, "05_range_shift" , "biotic_constraint_by_group.png" ), w = 10 , h = 6 )
}
```
# POST-ANALYSIS: HYPOTHESIS TESTING & MECHANISMS
## 1. Model Performance Comparison (LMM)
This block loads the stats, merges with groups, runs the LMM, and plots the results faceted/colored by group.
```{r}
# ==============================================================================
# 1. MODEL PERFORMANCE & VARIABLE IMPORTANCE (FINAL COMPLETE PIPELINE)
# ==============================================================================
# --- A. Load & Clean Data ---
library (lme4)
library (lmerTest)
# Load all stats files found
stats_files <- list.files (file.path (run_dir, "models_stats" ), full.names= TRUE , pattern= ".csv$" )
raw_stats <- purrr:: map_dfr (stats_files, read_csv, show_col_types= FALSE )
# FILTER: Keep only species that successfully finished ALL 3 model types
valid_species_stats <- raw_stats %>%
filter (model %in% c ("EnvOnly" , "HostOnly" , "Combined" )) %>%
group_by (species) %>%
filter (n_distinct (model) == 3 ) %>% # MUST have all 3 models
ungroup () %>%
mutate (model = factor (model, levels= c ("EnvOnly" , "HostOnly" , "Combined" )),
species = as.factor (species))
# Add Groups
if (exists ("fish_groups" )) {
valid_species_stats <- left_join (valid_species_stats, fish_groups, by= "species" ) %>%
filter (group %in% c ("generalist" , "specialist" ))
}
# Print who survived
survivors <- unique (valid_species_stats$ species)
cat (paste ("Species included in final analysis:" , length (survivors), " \n " ))
# --- SAVE RAW EVALUATION ARTEFACTS (Fold-wise Summaries) ---
# This serves as the raw data for your Appendix
raw_out_path <- file.path (tab_dir, "raw_fold_summaries.csv" )
write_csv (valid_species_stats, raw_out_path)
cat (sprintf ("Saved raw fold-wise summaries to: %s \n " , raw_out_path))
# --- B. LMM Statistical Tests ---
# This generates the p-values for your Results text
run_lmm <- function (data, metric, title) {
if (nrow (data) < 10 ) {
cat (paste0 (" \n >>> SKIP LMM: " , title, " (Too little data) <<< \n " ))
return (NULL )
}
f <- as.formula (paste (metric, "~ model + (1|species)" ))
cat (paste0 (" \n >>> LMM RESULTS: " , title, " <<< \n " ))
mod <- lmer (f, data= data)
print (summary (mod))
}
run_lmm (valid_species_stats, "auc" , "AUC - Overall" )
run_lmm (valid_species_stats, "tss" , "TSS - Overall" )
cat (" \n --- SUBSET: GENERALISTS --- \n " )
run_lmm (valid_species_stats %>% filter (group == "generalist" ), "auc" , "AUC - Generalists" )
run_lmm (valid_species_stats %>% filter (group == "generalist" ), "tss" , "TSS - Generalists" )
cat (" \n --- SUBSET: SPECIALISTS --- \n " )
run_lmm (valid_species_stats %>% filter (group == "specialist" ), "auc" , "AUC - Specialists" )
run_lmm (valid_species_stats %>% filter (group == "specialist" ), "tss" , "TSS - Specialists" )
# --- C. Per-Species Comparison Plot ---
plot_species_perf <- function (data, metric, title_text) {
ggplot (data, aes (x= species, y= .data[[metric]], fill= model)) +
geom_boxplot (outlier.size= 0.5 , alpha= 0.9 , position= position_dodge (0.8 )) +
scale_fill_manual (values= c ("EnvOnly" = "#E69F00" , "HostOnly" = "#56B4E9" , "Combined" = "#009E73" )) +
labs (y= toupper (metric), x= NULL , fill= "Model Type" ) +
theme_minimal () +
theme (
axis.text.x = element_text (angle= 45 , hjust= 1 , face= "italic" , size= 10 ),
legend.position = "top" ,
panel.grid.major.x = element_blank ()
)
}
p_auc_sp <- plot_species_perf (valid_species_stats, "auc" , "AUC by Species and Model Type" )
ggsave (file.path (fig_dir, "anemonefish_model_auc_comparison_per_species.png" ), p_auc_sp, width= 12 , height= 6 , bg= "white" )
print (p_auc_sp)
p_tss_sp <- plot_species_perf (valid_species_stats, "tss" , "TSS by Species and Model Type" )
ggsave (file.path (fig_dir, "anemonefish_model_tss_comparison_per_species.png" ), p_tss_sp, width= 12 , height= 6 , bg= "white" )
print (p_tss_sp)
# --- D. Summary Tables ---
thesis_table <- valid_species_stats %>%
filter (model == "Combined" ) %>%
group_by (species) %>%
summarise (
Mean_AUC = round (mean (auc), 3 ),
SD_AUC = round (sd (auc), 3 ),
Mean_TSS = round (mean (tss), 3 ),
SD_TSS = round (sd (tss), 3 )
) %>%
arrange (species)
write_csv (thesis_table, file.path (tab_dir, "thesis_combined_model_performance.csv" ))
fish_summary <- valid_species_stats %>%
group_by (species, model, group) %>%
summarise (
n_iter = n (),
auc_mean = mean (auc, na.rm = TRUE ),
auc_sd = sd (auc, na.rm = TRUE ),
tss_mean = mean (tss, na.rm = TRUE ),
boyce_mean = mean (boyce, na.rm = TRUE ),
.groups = "drop"
)
write_csv (fish_summary, file.path (tab_dir, "fish_performance_summary.csv" ))
# ==============================================================================
# E. VARIABLE IMPORTANCE (FISH & HOSTS)
# ==============================================================================
extract_maxnet_coefs <- function (rds_path) {
tryCatch ({
mod <- readRDS (rds_path)
if (! inherits (mod, "maxnet" )) return (NULL )
betas <- mod$ betas
if (length (betas) == 0 ) return (NULL )
beta_names <- names (betas)
safe_names <- attr (mod, "safe_names" )
real_names <- attr (mod, "orig_names" )
name_map <- setNames (real_names, safe_names)
clean_vars <- character (length (betas))
for (i in seq_along (betas)) {
b_name <- beta_names[i]
v_code <- str_extract (b_name, "v[0-9]+" )
if (! is.na (v_code) && v_code %in% names (name_map)) {
clean_vars[i] <- name_map[[v_code]]
} else {
clean_vars[i] <- "Unknown"
}
}
tibble:: tibble (
variable = clean_vars,
coefficient = as.numeric (betas),
file = basename (rds_path)
)
}, error = function (e) return (NULL ))
}
# --- FORCE UPDATE: Always run this block ---
coefs_cache_path <- file.path (tab_dir, "raw_model_coefficients.csv" )
rds_files <- list.files (file.path (run_dir, "models" ), pattern = " \\ .rds$" , full.names = TRUE )
if (length (rds_files) > 0 ) {
cat ("Extracting coefficients from" , length (rds_files), "models... (This takes time) \n " )
coef_list <- purrr:: map (rds_files, extract_maxnet_coefs)
coef_df <- dplyr:: bind_rows (coef_list)
# Parse metadata (Robust for Fish AND Hosts)
coef_df <- coef_df %>%
mutate (
species = str_extract (file, "^[A-Za-z]+_[a-z]+" ),
model = case_when (
str_detect (file, "EnvOnly" ) ~ "EnvOnly" ,
str_detect (file, "HostOnly" ) ~ "HostOnly" ,
str_detect (file, "Combined" ) ~ "Combined" ,
str_detect (file, "_Host_" ) ~ "Host" , # Capture Host models correctly
TRUE ~ "Other"
)
)
write_csv (coef_df, coefs_cache_path)
cat ("Saved fresh coefficients to:" , coefs_cache_path, " \n " )
}
# --- PLOTTING ---
model_coefs <- safe_read_csv (coefs_cache_path)
if (! is.null (model_coefs)) {
# 1. FISH VARIABLE IMPORTANCE
fish_coefs <- model_coefs %>%
filter (model %in% c ("EnvOnly" , "Combined" )) %>%
left_join (fish_groups, by = "species" ) %>%
filter (! is.na (group)) %>%
group_by (species, model, variable, group) %>%
summarise (mean_coef = mean (abs (coefficient)), .groups = "drop" )
p_fish <- ggplot (fish_coefs, aes (x = variable, y = mean_coef, fill = model)) +
geom_boxplot (outlier.alpha = 0.2 ) +
facet_wrap (~ group, scales = "free_x" ) +
coord_flip () +
scale_fill_manual (values = c ("EnvOnly" = "#E69F00" , "Combined" = "#009E73" )) +
labs (y = "Mean Absolute Coefficient" , x = NULL ) +
theme_minimal ()
save_plot (p_fish, file.path (fig_dir, "01_performance" , "variable_importance_coefs.png" ))
print (p_fish)
# 2. HOST VARIABLE IMPORTANCE (NEW)
host_coefs <- model_coefs %>%
filter (model == "Host" ) %>%
group_by (species, variable) %>%
summarise (mean_coef = mean (abs (coefficient)), .groups = "drop" )
if (nrow (host_coefs) > 0 ) {
p_host <- ggplot (host_coefs, aes (x = reorder (variable, mean_coef), y = mean_coef)) +
geom_boxplot (fill = "gray70" , outlier.alpha = 0.2 ) +
coord_flip () +
labs (y = "Mean Absolute Coefficient" , x = NULL ) +
theme_minimal ()
save_plot (p_host, file.path (fig_dir, "01_performance" , "host_variable_importance.png" ))
print (p_host)
}
}
# ==============================================================================
# F. SPATIAL RELIABILITY (ERROR PROPAGATION)
# ==============================================================================
scrape_fold_logs <- function (log_dir) {
log_files <- list.files (log_dir, pattern = " \\ .log$" , full.names = TRUE , recursive = TRUE )
if (length (log_files) == 0 ) return (NULL )
extract_lines <- function (f) {
lines <- tryCatch (readLines (f, warn = FALSE ), error= function (e) NULL )
if (is.null (lines)) return (NULL )
# Regex to capture: Iter number, Fold number, AUC, TSS
pattern <- "Iter \\ s+( \\ d+).*fold: \\ s+( \\ d+).*AUC: \\ s+([0-9.]+).*TSS: \\ s+([0-9.]+)"
matches <- str_match (lines, pattern)
valid <- ! is.na (matches[,1 ])
if (sum (valid) == 0 ) return (NULL )
fname <- basename (f)
species <- str_extract (fname, "^[A-Za-z]+_[a-z]+" )
# Robust Model Detection
model <- "Other"
if (str_detect (fname, "EnvOnly" )) model <- "EnvOnly"
if (str_detect (fname, "HostOnly" )) model <- "HostOnly"
if (str_detect (fname, "Combined" )) model <- "Combined"
if (str_detect (fname, "Host \\ .log" )) model <- "HostEnsemble"
tibble:: tibble (
species = species, model_type = model,
iter = as.numeric (matches[valid, 2 ]),
fold = as.numeric (matches[valid, 3 ]),
auc = as.numeric (matches[valid, 4 ])
)
}
purrr:: map_dfr (log_files, extract_lines)
}
# Run Scraper
log_data <- scrape_fold_logs (file.path (run_dir, "logs" ))
if (! is.null (log_data) && nrow (log_data) > 0 ) {
write_csv (log_data, file.path (tab_dir, "raw_spatial_fold_performance.csv" ))
# --- PLOT 3: The Error Propagation Plot (NEW) ---
# Filter for Fish (Combined) and Hosts
fold_plot_data <- log_data %>%
filter (model_type %in% c ("Combined" , "HostEnsemble" )) %>%
mutate (Category = ifelse (model_type == "Combined" , "Anemonefish (Realized)" , "Host Anemones" ))
p_spatial <- ggplot (fold_plot_data, aes (x = factor (fold), y = auc, fill = Category)) +
geom_boxplot (alpha = 0.7 , outlier.alpha = 0.2 ) +
scale_fill_manual (values = c ("Anemonefish (Realized)" = "#009E73" , "Host Anemones" = "gray50" )) +
labs (x = "Spatial Fold ID (Block)" , y = "AUC" , fill = NULL ) +
theme_minimal () +
theme (legend.position = "bottom" )
save_plot (p_spatial, file.path (fig_dir, "01_performance" , "spatial_error_propagation.png" ))
print (p_spatial)
# Print Summary Table
cat (" \n --- SPATIAL FOLD SUMMARY --- \n " )
print (fold_plot_data %>%
group_by (Category, fold) %>%
summarise (Mean_AUC = mean (auc), .groups= "drop" ))
}
```
## 2. Biotic Constraint (Range Shift)
Note: Amphiprion bicinctus is excluded from range shift calculations due to geometric artifacts in the Red Sea.
```{r}
# --- 2. BIOTIC CONSTRAINT (Hypothesis 2) - Excluding Outliers ---
cat (" \n --- HYPOTHESIS 2: BIOTIC CONSTRAINT --- \n " )
# Helper for Centroids
get_weighted_centroid <- function (r) {
df <- terra:: as.data.frame (r, xy= TRUE , na.rm= TRUE )
colnames (df) <- c ("x" , "y" , "val" )
tm <- sum (df$ val)
if (tm == 0 ) return (c (x= NA , y= NA ))
return (c (x= sum (df$ x* df$ val)/ tm, y= sum (df$ y* df$ val)/ tm))
}
all_shift_results <- data.frame ()
if (length (future_scenarios) > 0 ) {
for (scen in future_scenarios) {
# message(paste("Calculating shifts for:", scen))
for (sp in fish_species) {
f_curr_env <- find_fish_raster (sp, "EnvOnly" , "current" , stat= "mean" )
f_fut_env <- find_fish_raster (sp, "EnvOnly" , "future" , scenario= scen, stat= "mean" )
f_curr_comb <- find_fish_raster (sp, "Combined" , "current" , stat= "mean" )
f_fut_comb <- find_fish_raster (sp, "Combined" , "future" , scenario= scen, stat= "mean" )
if (any (sapply (list (f_curr_env, f_fut_env, f_curr_comb, f_fut_comb), is.null))) next
tryCatch ({
r_curr_env <- terra:: rast (f_curr_env)
r_fut_env <- terra:: rast (f_fut_env)
r_curr_comb <- terra:: rast (f_curr_comb)
r_fut_comb <- terra:: rast (f_fut_comb)
c_curr_env <- get_weighted_centroid (r_curr_env)
c_fut_env <- get_weighted_centroid (r_fut_env)
dist_pot <- haversine_km (c_curr_env[1 ], c_curr_env[2 ], c_fut_env[1 ], c_fut_env[2 ])
c_curr_comb <- get_weighted_centroid (r_curr_comb)
c_fut_comb <- get_weighted_centroid (r_fut_comb)
dist_real <- haversine_km (c_curr_comb[1 ], c_curr_comb[2 ], c_fut_comb[1 ], c_fut_comb[2 ])
all_shift_results <- rbind (all_shift_results, data.frame (
species = sp,
scenario = scen,
shift_potential_km = dist_pot,
shift_realized_km = dist_real,
lag_km = dist_pot - dist_real
))
}, error= function (e) NULL )
}
}
}
# Add Groups
if (exists ("fish_groups" ) && nrow (all_shift_results) > 0 ) {
all_shift_results <- left_join (all_shift_results, fish_groups, by= "species" ) %>%
filter (group != "unknown" )
}
# Save Table & Plot
if (nrow (all_shift_results) > 0 ) {
# --- UPDATED: Save ALL scenarios to CSV ---
final_table <- all_shift_results %>%
mutate (
Potential_Shift_km = round (shift_potential_km, 1 ),
Realized_Shift_km = round (shift_realized_km, 1 ),
Lag_km = round (lag_km, 1 ),
Pct_Loss = round ((lag_km / shift_potential_km) * 100 , 1 )
) %>%
dplyr:: select (species, group, scenario, Potential_Shift_km, Realized_Shift_km, Lag_km, Pct_Loss) %>%
arrange (scenario, desc (Lag_km))
write_csv (final_table, file.path (tab_dir, "table_biotic_constraint_shifts_clean.csv" ))
print (head (final_table))
# --- FIX LABELS START ---
all_shift_results <- all_shift_results |>
mutate (scenario = str_replace_all (scenario, "ssp( \\ d)( \\ d)( \\ d)_( \\ d+)" , "SSP \\ 1- \\ 2. \\ 3 \\ 4" ))
# --- FIX LABELS END ---
# Plot Biotic Brake (Faceted by Scenario)
p_brake <- ggplot (all_shift_results, aes (x= shift_potential_km, y= shift_realized_km, color= group)) +
geom_abline (slope= 1 , intercept= 0 , linetype= "dashed" , color= "gray50" ) +
geom_point (size= 2.5 , alpha= 0.7 ) +
scale_color_manual (values= c ("generalist" = "#E69F00" , "specialist" = "#56B4E9" )) +
facet_wrap (~ scenario) +
labs (x= "Potential Shift (Climate Only) [km]" ,
y= "Realized Shift (With Host) [km]" ) +
theme_minimal () +
theme (legend.position= "bottom" , strip.text= element_text (face= "bold" ))
ggsave (file.path (fig_dir, "Figure9a_Biotic_Brake_Scatter_Clean.png" ), p_brake, w= 10 , h= 6 )
print (p_brake)
}
```
## NICHE BREADTH vs RANGE SHIFT LMM (New Analysis)
```{r}
# ==============================================================================
# 2B. NICHE BREADTH vs RANGE SHIFT LMM (New Analysis)
# ==============================================================================
cat (" \n --- HYPOTHESIS 2B: NICHE BREADTH LMM --- \n " )
# 1. Prepare Data
# We need 'shift_tbl' (from block 2) merged with 'b2_df' (niche breadth)
# Re-calculate B2 if not already in memory, or ensure b2_df exists from previous blocks.
if (exists ("shift_tbl" ) && exists ("fish_species" )) {
# Calculate B2 if missing (Fast re-run)
if (! exists ("b2_df" )) {
b2_list <- list ()
for (sp in fish_species) {
f_curr <- find_fish_raster (sp, "Combined" , "current" )
if (! is.null (f_curr)) {
r <- safe_read_rast (f_curr)
val <- values (r, mat= FALSE , na.rm= TRUE )
val <- val[val > 0 ]
b2 <- if (length (val)> 0 ) 1 / (length (val)* sum ((val/ sum (val))^ 2 )) else NA
b2_list[[length (b2_list)+ 1 ]] <- tibble (species= sp, niche_breadth_B2= b2)
}
}
b2_df <- bind_rows (b2_list)
}
# Merge Data for LMM
lmm_data <- shift_tbl %>%
filter (model_type == "Combined" ) %>% # Only care about Realized Shift
inner_join (b2_df, by= "species" ) %>%
filter (! is.na (shift_km) & ! is.na (niche_breadth_B2))
# 2. Run Linear Mixed Model
# Fixed Effect: Niche Breadth
# Random Effect: Species (intercept) nested in Scenario?
# Actually, just (1|species) + (1|scenario) handles the crossed structure best
library (lme4)
library (lmerTest)
cat (" \n > Running LMM: Range Shift ~ Niche Breadth + (1|Scenario) \n " )
# Note: Species is the level of observation for Niche Breadth, so we can't control for species
# as a random effect against its own trait easily unless we had multiple traits per species.
# BUT, since we have multiple observations per species (multiple scenarios), we CAN control for species!
mod_breadth <- lmer (shift_km ~ niche_breadth_B2 + (1 | species) + (1 | scenario), data= lmm_data)
print (summary (mod_breadth))
# 3. Save Stats for Thesis Text
# Extract P-value and Beta
coefs <- summary (mod_breadth)$ coefficients
beta <- coefs["niche_breadth_B2" , "Estimate" ]
pval <- coefs["niche_breadth_B2" , "Pr(>|t|)" ]
cat (sprintf (" \n RESULTS FOR TEXT: \n Beta: %.4f \n P-value: %.5f \n " , beta, pval))
}
```
## 3. Specialist vs. Generalist Test
```{r}
# --- 3. SPECIALIST VS GENERALIST (Hypothesis 3) ---
cat (" \n --- HYPOTHESIS 3: SPECIALIST VS GENERALIST --- \n " )
if (exists ("all_shift_results" ) && nrow (all_shift_results) > 0 ) {
# --- FIX LABELS START ---
# Safe to run even if already fixed (regex won't match if already "SSP...")
all_shift_results <- all_shift_results |>
mutate (scenario = str_replace_all (scenario, "ssp( \\ d)( \\ d)( \\ d)_( \\ d+)" , "SSP \\ 1- \\ 2. \\ 3 \\ 4" ))
# --- FIX LABELS END ---
# Plot Boxplot (Faceted)
p_spec <- ggplot (all_shift_results, aes (x= group, y= lag_km, fill= group)) +
geom_boxplot (alpha= 0.8 , outlier.shape= NA ) +
geom_jitter (width= 0.2 , size= 1 , alpha= 0.4 ) +
facet_wrap (~ scenario) +
scale_fill_manual (values= c ("generalist" = "#E69F00" , "specialist" = "#56B4E9" )) +
labs (y= "Range Lag (km)" , x= NULL ) +
theme_minimal () +
theme (legend.position= "bottom" , strip.text= element_text (face= "bold" ))
ggsave (file.path (fig_dir, "Figure11_Brake_vs_Specialization_Clean.png" ), p_spec, w= 10 , h= 6 )
print (p_spec)
# --- UPDATED: Loop T-Tests for ALL Scenarios ---
scenarios <- unique (all_shift_results$ scenario)
ttest_summary <- list ()
for (sc in scenarios) {
sub_data <- all_shift_results %>% filter (scenario == sc)
# Check if we have enough data points for both groups
n_gen <- sum (sub_data$ group == "generalist" )
n_spec <- sum (sub_data$ group == "specialist" )
if (n_gen > 2 && n_spec > 2 ) {
test <- t.test (lag_km ~ group, data= sub_data)
ttest_summary[[length (ttest_summary)+ 1 ]] <- tibble:: tibble (
scenario = sc,
t_value = round (test$ statistic, 3 ),
df = round (test$ parameter, 2 ),
p_value = test$ p.value,
significance = ifelse (test$ p.value < 0.05 , "*" , "ns" ),
mean_generalist = round (test$ estimate[1 ], 1 ),
mean_specialist = round (test$ estimate[2 ], 1 )
)
cat (paste0 (" \n > Scenario: " , sc, " \n " ))
print (test)
}
}
# Print Summary Table of Stats
if (length (ttest_summary) > 0 ) {
summary_df <- dplyr:: bind_rows (ttest_summary)
print (summary_df)
write_csv (summary_df, file.path (tab_dir, "specialist_vs_generalist_stats.csv" ))
}
}
```
## Maps & Deltas
```{r}
# ==============================================================================
# 5. MAPPING: CURRENT SDMs & FUTURE DELTAS (NAMESPACED & ROBUST)
# ==============================================================================
# Ensure libraries are loaded (explicitly checking)
if (! requireNamespace ("tidyterra" , quietly= TRUE )) install.packages ("tidyterra" )
if (! requireNamespace ("rnaturalearth" , quietly= TRUE )) install.packages ("rnaturalearth" )
# Note: Libraries are loaded, but we use explicit namespaces below for safety
# Load World Land Data (for background)
# Check if it exists globally first to save time
if (! exists ("world_land" )) {
if (requireNamespace ("rnaturalearth" , quietly= TRUE )) {
world_land <- rnaturalearth:: ne_countries (scale = "medium" , returnclass = "sf" )
}
}
# Helper for plotting beautiful rasters (Namespaced)
plot_suitability <- function (r, title_text) {
ggplot2:: ggplot () +
# Add Land (Gray)
ggplot2:: geom_sf (data = world_land, fill = "gray80" , color = "white" , size = 0.1 ) +
# Add Raster (Marine)
tidyterra:: geom_spatraster (data = r) +
tidyterra:: scale_fill_whitebox_c (
palette = "deep" ,
direction = 1 ,
na.value = "transparent" ,
labels = scales:: percent
) +
ggplot2:: coord_sf (xlim = c (30 , 180 ), ylim = c (- 40 , 40 ), expand = FALSE ) + # Force Indo-Pacific crop
ggplot2:: labs (fill = "Suitability" , title = title_text) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text = ggplot2:: element_blank (),
axis.ticks = ggplot2:: element_blank (),
panel.grid = ggplot2:: element_blank (),
plot.title = ggplot2:: element_text (face = "bold" , hjust = 0.5 )
)
}
# Helper for plotting Deltas (Namespaced)
plot_delta <- function (r, title_text) {
ggplot2:: ggplot () +
# Add Land (Gray)
ggplot2:: geom_sf (data = world_land, fill = "gray80" , color = "white" , size = 0.1 ) +
# Add Delta Raster
tidyterra:: geom_spatraster (data = r) +
ggplot2:: scale_fill_gradient2 (
low = "#D73027" , mid = "#F7F7F7" , high = "#4575B4" ,
midpoint = 0 ,
na.value = "transparent" ,
limits = c (- 1 , 1 ),
name = "Change"
) +
ggplot2:: coord_sf (xlim = c (30 , 180 ), ylim = c (- 40 , 40 ), expand = FALSE ) + # Force Indo-Pacific crop
ggplot2:: labs (title = title_text) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text = ggplot2:: element_blank (),
axis.ticks = ggplot2:: element_blank (),
panel.grid = ggplot2:: element_blank (),
plot.title = ggplot2:: element_text (face = "bold" , hjust = 0.5 )
)
}
# --- Loop over Species for Maps ---
cat (" \n --- Generating Species Maps --- \n " )
# ------------------------------------------------------------------------------
# PART A: HOST ANEMONE MAPS
# ------------------------------------------------------------------------------
# We need to discover the list of hosts first since 'fish_species' only has fish.
host_list <- list.files (file.path (pred_cur, "hosts" ), pattern= "_mean.tif$" )
host_species <- unique (sub ("_mean.tif" , "" , host_list))
cat (" \n --- Generating Host Maps --- \n " )
for (h in host_species) {
# 1. Current Map
f_curr <- find_host_raster (h, "current" , stat= "mean" )
if (! is.null (f_curr)) {
r_curr <- safe_read_rast (f_curr)
if (! is.null (r_curr)) {
p <- plot_suitability (r_curr, paste0 (gsub ("_" , " " , h), " - Current" ))
out_path <- file.path (fig_dir, "02_maps" , "hosts" , "current" , paste0 (h, "_current.png" ))
dir.create (dirname (out_path), recursive = TRUE , showWarnings = FALSE )
ggplot2:: ggsave (out_path, p, width = 8 , height = 5 , bg = "white" )
print (p)
}
}
# 2. Future Delta Maps
for (sc in future_scenarios) {
f_curr <- find_host_raster (h, "current" , stat= "mean" )
f_fut <- find_host_raster (h, "future" , scenario = sc, stat= "mean" )
if (is.null (f_curr) || is.null (f_fut)) next
r_curr <- safe_read_rast (f_curr)
r_fut <- safe_read_rast (f_fut)
if (! terra:: compareGeom (r_curr, r_fut, stopOnError = FALSE )) {
r_fut <- terra:: resample (r_fut, r_curr, method = "bilinear" )
r_fut <- terra:: mask (r_fut, r_curr)
}
r_delta <- r_fut - r_curr
names (r_delta) <- "delta"
p_delta <- plot_delta (r_delta, paste0 (gsub ("_" , " " , h), " - Change (" , sc, ")" ))
out_path <- file.path (fig_dir, "02_maps" , "hosts" , "future_delta" , sc, paste0 (h, "_delta.png" ))
dir.create (dirname (out_path), recursive = TRUE , showWarnings = FALSE )
ggplot2:: ggsave (out_path, p_delta, width = 8 , height = 5 , bg = "white" )
print (p_delta)
}
}
# ------------------------------------------------------------------------------
# PART B: FISH MAPS (Combined Model Only)
# ------------------------------------------------------------------------------
cat (" \n --- Generating Fish Maps --- \n " )
for (sp in fish_species) {
grp <- fish_group_of (sp)
# 1. Current Maps (EnvOnly & Combined)
for (mt in c ("Combined" )) {
f_curr <- find_fish_raster (sp, mt, "current" )
if (is.null (f_curr)) next
r_curr <- safe_read_rast (f_curr)
if (is.null (r_curr)) next
# Save Plot
p <- plot_suitability (r_curr, paste0 (gsub ("_" , " " , sp), " (" , mt, ") - Current" ))
out_path <- file.path (fig_dir, "02_maps" , "current" , mt, grp, sp, paste0 (sp, "_current.png" ))
dir.create (dirname (out_path), recursive = TRUE , showWarnings = FALSE )
ggplot2:: ggsave (out_path, p, width = 8 , height = 5 , bg = "white" )
print (p)
}
# 2. Future Delta Maps (All Scenarios)
for (sc in future_scenarios) {
for (mt in c ("Combined" )) {
f_curr <- find_fish_raster (sp, mt, "current" )
f_fut <- find_fish_raster (sp, mt, "future" , scenario = sc)
if (is.null (f_curr) || is.null (f_fut)) next
r_curr <- safe_read_rast (f_curr)
r_fut <- safe_read_rast (f_fut)
if (is.null (r_curr) || is.null (r_fut)) next
# Align Extents if needed (Robust subtraction)
if (! terra:: compareGeom (r_curr, r_fut, stopOnError = FALSE )) {
r_fut <- terra:: resample (r_fut, r_curr, method = "bilinear" )
r_fut <- terra:: mask (r_fut, r_curr)
}
# Calculate Delta
r_delta <- r_fut - r_curr
names (r_delta) <- "delta"
# Save Plot
p_delta <- plot_delta (r_delta, paste0 (gsub ("_" , " " , sp), " - Change (" , sc, ")" ))
out_path_delta <- file.path (fig_dir, "02_maps" , "future_delta" , sc, mt, grp, sp, paste0 (sp, "_delta.png" ))
dir.create (dirname (out_path_delta), recursive = TRUE , showWarnings = FALSE )
ggplot2:: ggsave (out_path_delta, p_delta, width = 8 , height = 5 , bg = "white" )
print (p_delta)
}
}
}
```
## NICHE BREADTH & CLIMATE SENSITIVITY (Post-Hoc Analysis)
```{r}
# ==============================================================================
# 6. NICHE BREADTH & RANGE SHIFT (Using shift_tbl)
# ==============================================================================
# # 1. Set the path to your new results file
# # Adjust "outputs/final_run" if your project structure is different
# input_csv <- "outputs/final_run/analysis_tables/centroid_shifts.csv"
#
# # 2. Read the data into 'shift_tbl'
# if (file.exists(input_csv)) {
# shift_tbl <- read_csv(input_csv, show_col_types = FALSE)
#
# # Ensure the column types are correct for the plotting code
# shift_tbl <- shift_tbl %>%
# mutate(
# species = as.character(species),
# group = as.character(group),
# model_type = as.character(model_type),
# scenario = as.character(scenario),
# shift_km = as.numeric(shift_km)
# )
#
# cat("✅ Successfully loaded 'shift_tbl' from CSV.\n")
# print(head(shift_tbl))
#
# } else {
# stop("❌ Could not find file: ", input_csv, "\nCheck your directory path!")
# }
# Helper: Simple Levin's B2 calculation
calc_levins_B2 <- function (r) {
if (is.null (r)) return (NA )
v <- terra:: values (r, mat= FALSE , na.rm= TRUE )
v <- v[v > 0 ]
if (length (v) == 0 ) return (NA )
p <- v / sum (v)
return (1 / (length (p) * sum (p^ 2 )))
}
# 1. Calculate Current Niche Breadth (B2) for all species
# We only need to load CURRENT rasters here (Fast)
b2_results <- list ()
cat (" \n --- Calculating Niche Breadth (B2) --- \n " )
# Combine fish and hosts (Hosts need B2 too)
# Note: shift_tbl only has fish, so we will plot fish primarily, or just fish.
# Your Results text focuses on "Species with broader tolerances...", implying fish.
for (sp in fish_species) {
# Load Combined model for fish (Realized Niche)
f_curr <- find_fish_raster (sp, "Combined" , "current" )
if (is.null (f_curr)) next
r_curr <- safe_read_rast (f_curr)
b2 <- calc_levins_B2 (r_curr)
b2_results[[length (b2_results)+ 1 ]] <- tibble:: tibble (species = sp, niche_breadth_B2 = b2)
}
b2_df <- dplyr:: bind_rows (b2_results)
# 2. Merge with your EXISTING shift_tbl
# shift_tbl has: species, scenario, model_type, shift_km
if (exists ("shift_tbl" ) && ! is.null (shift_tbl) && nrow (b2_df) > 0 ) {
# Filter for Realized Shift (Combined Model)
shifts_for_plot <- shift_tbl %>%
dplyr:: filter (model_type == "Combined" ) %>%
dplyr:: inner_join (b2_df, by = "species" ) %>%
mutate (scenario = str_replace_all (scenario, "ssp( \\ d)( \\ d)( \\ d)_( \\ d+)" , "SSP \\ 1- \\ 2. \\ 3 \\ 4" ))
# 3. Plot: Niche Breadth vs Range Shift (km)
p_nb <- ggplot2:: ggplot (shifts_for_plot, ggplot2:: aes (x = niche_breadth_B2, y = shift_km, color = group)) +
ggplot2:: geom_point (size = 3 , alpha = 0.7 ) +
ggplot2:: geom_smooth (method = "lm" , se = TRUE , alpha = 0.2 ) +
ggplot2:: facet_wrap (~ scenario) +
ggplot2:: scale_color_manual (values = c ("generalist" = "#E69F00" , "specialist" = "#56B4E9" )) +
ggplot2:: labs (
x = "Realized Niche Breadth (Levin's B2)" ,
y = "Projected Range Shift (km)"
) +
ggplot2:: theme_minimal ()
# Save
out_path <- file.path (fig_dir, "niche_breadth_vs_suitability_shift_all_scenarios.png" ) # Keeping filename same for LaTeX compatibility
ggplot2:: ggsave (out_path, p_nb, width = 10 , height = 8 , bg = "white" )
print (p_nb)
# Save CSV for reference
readr:: write_csv (shifts_for_plot, file.path (tab_dir, "niche_breadth_analysis.csv" ))
}
```
## Community Richness and Conservation Prioritization
This section synthesizes species-level predictions into community-level metrics to inform conservation planning. We calculate aggregate species richness for both guilds under all scenarios and develop a "Conservation Priority Index" (CPI) to identify stable climate refugia.
```{r}
#| label: conservation-analysis
#| fig-width: 12
#| fig-height: 10
#| warning: false
#| message: false
# ==============================================================================
# 6. SOCIAL INFORMATICS: CONSERVATION PRIORITIZATION & ZONING (ROBUST)
# ==============================================================================
# This section synthesizes species-level predictions into community-level metrics.
cat (" \n --- STARTING CONSERVATION PRIORITIZATION ANALYSIS --- \n " )
# ------------------------------------------------------------------------------
# A. HELPER FUNCTIONS
# ------------------------------------------------------------------------------
# Helper to sum rasters for a specific list of species
get_community_richness <- function (species_list, type= "fish" , model= "Combined" , scenario= "current" ) {
stack_list <- list ()
for (sp in species_list) {
f_path <- NULL
if (type == "fish" ) {
w <- if (scenario == "current" ) "current" else "future"
sc <- if (scenario == "current" ) NULL else scenario
f_path <- find_fish_raster (sp, model, w, scenario= sc, stat= "mean" )
} else {
w <- if (scenario == "current" ) "current" else "future"
sc <- if (scenario == "current" ) NULL else scenario
f_path <- find_host_raster (sp, w, scenario= sc, stat= "mean" )
}
if (! is.null (f_path)) {
r <- safe_read_rast (f_path)
if (! is.null (r)) stack_list[[sp]] <- r
}
}
if (length (stack_list) == 0 ) return (NULL )
# Stack
r_stack <- terra:: rast (stack_list)
# ROBUST SUM: Align all layers to the first one to prevent extent errors
if (terra:: nlyr (r_stack) > 1 ) {
# Use first layer as template for the stack
template <- r_stack[[1 ]]
r_stack <- terra:: resample (r_stack, template, method= "near" )
}
r_sum <- sum (r_stack, na.rm= TRUE )
names (r_sum) <- "richness"
return (r_sum)
}
# Plotting function
plot_richness_map <- function (r, title_text, pal_option= "D" ) {
ggplot2:: ggplot () +
ggplot2:: geom_sf (data = world_land, fill = "gray80" , color = "white" , size = 0.1 ) +
tidyterra:: geom_spatraster (data = r) +
ggplot2:: scale_fill_viridis_c (option = pal_option, na.value = "transparent" , name = "Richness" ) +
ggplot2:: coord_sf (xlim = c (30 , 180 ), ylim = c (- 40 , 40 ), expand = FALSE ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text = ggplot2:: element_blank (),
axis.ticks = ggplot2:: element_blank (),
panel.grid = ggplot2:: element_blank ()
)
}
# ------------------------------------------------------------------------------
# B. GENERATE RICHNESS MAPS
# ------------------------------------------------------------------------------
scenarios_to_run <- c ("current" , future_scenarios)
layers_cache <- list ()
# 1. HOSTS
host_files <- list.files (file.path (pred_cur, "hosts" ), pattern= "_mean.tif$" )
host_sp_list <- unique (sub ("_mean.tif" , "" , host_files))
for (sc in scenarios_to_run) {
cat (paste ("Generating Host Richness:" , sc, " \n " ))
r_host <- get_community_richness (host_sp_list, type= "host" , scenario= sc)
if (! is.null (r_host)) {
layers_cache[[paste0 ("host_" , sc)]] <- r_host
p <- plot_richness_map (r_host, paste0 ("Host Richness - " , sc), "mako" )
out_path <- file.path (fig_dir, "06_community" , "hosts" , paste0 ("richness_" , sc, ".png" ))
dir.create (dirname (out_path), recursive= TRUE , showWarnings= FALSE )
ggplot2:: ggsave (out_path, p, width= 8 , height= 5 , bg= "white" )
}
}
# 2. FISH (Combined)
for (sc in scenarios_to_run) {
cat (paste ("Generating Fish Richness:" , sc, " \n " ))
r_fish <- get_community_richness (fish_species, type= "fish" , model= "Combined" , scenario= sc)
if (! is.null (r_fish)) {
layers_cache[[paste0 ("fish_" , sc)]] <- r_fish
p <- plot_richness_map (r_fish, paste0 ("Anemonefish Richness (Realized) - " , sc), "turbo" )
out_path <- file.path (fig_dir, "06_community" , "fish" , paste0 ("richness_" , sc, ".png" ))
dir.create (dirname (out_path), recursive= TRUE , showWarnings= FALSE )
ggplot2:: ggsave (out_path, p, width= 8 , height= 5 , bg= "white" )
}
}
# ------------------------------------------------------------------------------
# C. CONSERVATION INDICES (FIXED GEOMETRY ALIGNMENT)
# ------------------------------------------------------------------------------
target_future_scen <- "ssp585_2100"
# Check if layers exist
r_curr_fish <- layers_cache$ fish_current
r_fut_host <- layers_cache[[paste0 ("host_" , target_future_scen)]]
if (! is.null (r_curr_fish) && ! is.null (r_fut_host)) {
cat (" \n --- Calculating Conservation Indices --- \n " )
# --- STEP 1: FORCE ALIGNMENT OF INPUTS ---
# We use r_curr_fish as the MASTER geometry template for everything.
# If any raster differs, we force it to match this master.
if (! terra:: compareGeom (r_curr_fish, r_fut_host, stopOnError= FALSE )) {
cat ("Aligning Future Host raster to Master (Current Fish) geometry... \n " )
r_fut_host <- terra:: resample (r_fut_host, r_curr_fish, method= "near" )
r_fut_host <- terra:: mask (r_fut_host, r_curr_fish)
}
norm_r <- function (r) {
minmax_v <- terra:: minmax (r)
if (minmax_v[2 ] == minmax_v[1 ]) return (r * 0 )
(r - minmax_v[1 ]) / (minmax_v[2 ] - minmax_v[1 ])
}
# 1. CPI Calculation
r_cpi <- norm_r (r_curr_fish) * norm_r (r_fut_host)
names (r_cpi) <- "CPI"
p_cpi <- ggplot2:: ggplot () +
ggplot2:: geom_sf (data = world_land, fill = "gray90" , color = NA ) +
tidyterra:: geom_spatraster (data = r_cpi) +
ggplot2:: scale_fill_gradientn (
colors = c ("#FFFFFF" , "#e5f5e0" , "#a1d99b" , "#41ab5d" , "#005a32" ),
name = "Priority \n Index" , na.value = "transparent"
) +
ggplot2:: coord_sf (xlim = c (30 , 180 ), ylim = c (- 40 , 40 ), expand = FALSE ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (panel.grid = ggplot2:: element_blank ())
ggsave (file.path (fig_dir, "07_planning" , "Map_Conservation_Priority_Index.png" ), p_cpi, width= 8 , height= 5 , bg= "white" )
print (p_cpi)
# 2. ROI Calculation (Ghost Habitat)
# Needs Future EnvOnly Fish
r_fish_pot <- get_community_richness (fish_species, type= "fish" , model= "EnvOnly" , scenario= target_future_scen)
r_fish_real <- layers_cache[[paste0 ("fish_" , target_future_scen)]]
if (! is.null (r_fish_pot) && ! is.null (r_fish_real)) {
# --- STEP 2: FORCE ALIGNMENT OF ROI INPUTS ---
# Both must match the MASTER geometry (r_curr_fish) to ensure r_roi matches r_cpi later
if (! terra:: compareGeom (r_curr_fish, r_fish_pot, stopOnError= FALSE )) {
r_fish_pot <- terra:: resample (r_fish_pot, r_curr_fish, method= "near" )
r_fish_pot <- terra:: mask (r_fish_pot, r_curr_fish)
}
if (! terra:: compareGeom (r_curr_fish, r_fish_real, stopOnError= FALSE )) {
r_fish_real <- terra:: resample (r_fish_real, r_curr_fish, method= "near" )
r_fish_real <- terra:: mask (r_fish_real, r_curr_fish)
}
r_roi <- r_fish_pot - r_fish_real
r_roi <- terra:: clamp (r_roi, lower= 0 )
names (r_roi) <- "ROI"
p_roi <- ggplot2:: ggplot () +
ggplot2:: geom_sf (data = world_land, fill = "gray90" , color = NA ) +
tidyterra:: geom_spatraster (data = r_roi) +
ggplot2:: scale_fill_gradientn (
colors = c ("#FFFFFF" , "#fcbba1" , "#fb6a4a" , "#cb181d" , "#67000d" ),
name = "Restoration \n Value" , na.value = "transparent"
) +
ggplot2:: coord_sf (xlim = c (30 , 180 ), ylim = c (- 40 , 40 ), expand = FALSE ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (panel.grid = ggplot2:: element_blank ())
ggsave (file.path (fig_dir, "07_planning" , "Map_Restoration_Opportunity_Index.png" ), p_roi, width= 8 , height= 5 , bg= "white" )
print (p_roi)
# 3. ZONING MAP
# Use quantile thresholds on non-zero values
get_thresh <- function (r, q= 0.7 ) {
v <- terra:: values (r, mat= FALSE )
v <- v[v > 0 & ! is.na (v)]
if (length (v) == 0 ) return (0 )
quantile (v, probs= q)
}
# --- STEP 3: FINAL ALIGNMENT CHECK BEFORE ZONING ---
# Ensure r_roi matches r_cpi exactly before boolean logic
if (! terra:: compareGeom (r_cpi, r_roi, stopOnError= FALSE )) {
cat ("Aligning ROI to CPI geometry for final zoning... \n " )
r_roi <- terra:: resample (r_roi, r_cpi, method= "near" )
r_roi <- terra:: mask (r_roi, r_cpi)
}
t_cpi <- get_thresh (r_cpi, 0.7 )
t_roi <- get_thresh (r_roi, 0.7 )
# Initialize with template to match extents
r_zone <- terra:: rast (r_cpi)
terra:: values (r_zone) <- 0
# Vectorized assignment
cpi_mask <- r_cpi > t_cpi
roi_mask <- r_roi > t_roi
# Logic: 1=Refugia, 2=Restore, 3=Both
# Using terra logic to avoid memory issues
r_zone <- terra:: ifel (cpi_mask, 1 , 0 )
r_zone <- terra:: ifel (roi_mask, 2 , r_zone)
r_zone <- terra:: ifel (cpi_mask & roi_mask, 3 , r_zone)
# Mask out zeros for cleaner plot
r_zone <- terra:: mask (r_zone, r_zone, maskvalue= 0 )
r_zone <- terra:: as.factor (r_zone)
# Colors
cols <- c ("1" = "forestgreen" , "2" = "firebrick" , "3" = "goldenrod" )
lbls <- c ("1" = "Refugia (Protect)" , "2" = "Ghost (Restore)" , "3" = "Dual Priority" )
p_zone <- ggplot2:: ggplot () +
ggplot2:: geom_sf (data = world_land, fill = "gray95" , color = NA ) +
tidyterra:: geom_spatraster (data = r_zone) +
ggplot2:: scale_fill_manual (
values = cols,
labels = lbls,
na.translate = FALSE ,
name = "Policy Zone"
) +
ggplot2:: coord_sf (xlim = c (30 , 180 ), ylim = c (- 40 , 40 ), expand = FALSE ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (panel.grid = ggplot2:: element_blank ())
ggsave (file.path (fig_dir, "07_planning" , "Map_Conservation_Zoning.png" ), p_zone, width= 8 , height= 5 , bg= "white" )
print (p_zone)
}
}
```
## Occurences Used in Models
```{r}
# ==============================================================================
# GET FINAL OCCURRENCE COUNTS (PLAIN TEXT OUTPUT)
# ==============================================================================
# 1. Define Species Lists (17 Fish, 8 Hosts)
final_fish_list <- c (
"Amphiprion_akallopisos" , "Amphiprion_akindynos" , "Amphiprion_allardi" ,
"Amphiprion_chrysogaster" , "Amphiprion_chrysopterus" , "Amphiprion_ephippium" ,
"Amphiprion_frenatus" , "Amphiprion_fuscocaudatus" , "Amphiprion_latifasciatus" ,
"Amphiprion_leucokranos" , "Amphiprion_melanopus" , "Amphiprion_nigripes" ,
"Amphiprion_ocellaris" , "Amphiprion_percula" , "Amphiprion_polymnus" ,
"Amphiprion_rubrocinctus" , "Amphiprion_tricinctus"
)
final_host_list <- c (
"Cryptodendrum_adhaesivum" , "Entacmaea_quadricolor" , "Heteractis_aurora" ,
"Heteractis_crispa" , "Heteractis_magnifica" , "Heteractis_malu" ,
"Stichodactyla_gigantea" , "Stichodactyla_haddoni"
)
# 2. Helper Function to find counts
get_occ_count <- function (sp, type) {
# Construct path relative to your project root
f_path <- file.path ("outputs" , "final_run" , "occurrences" , type, paste0 (sp, "_occ_used.csv" ))
if (file.exists (f_path)) {
# Fast count of lines (minus header)
df <- read.csv (f_path)
return (nrow (df))
} else {
return (NA )
}
}
# 3. Print Results to Console
results <- data.frame (Species = character (), Type = character (), Count = integer (), stringsAsFactors = FALSE )
for (sp in final_fish_list) {
n <- get_occ_count (sp, "fish" )
results[nrow (results) + 1 , ] <- list (sp, "Anemonefish" , n)
}
for (sp in final_host_list) {
n <- get_occ_count (sp, "hosts" )
results[nrow (results) + 1 , ] <- list (sp, "Host Anemone" , n)
}
print (results)
```
## Fold Plots
```{r}
# # Quick Check: Where is Fold 3?
# library(ggplot2)
# library(ENMeval)
#
# # 1. Load one species occurrence file (any one will do to see the blocks)
# # Adjust path to where your occs are stored
# occ <- read.csv("outputs/final_run/occurrences/fish/Amphiprion_clarkii_occ_used.csv")
#
# # 2. Re-run the blocking function (block_quadrant)
# # This reproduces the exact folds used in your model
# folds <- get.block(occ, occ[,c("x","y")], method="block")
#
# # 3. Plot it
# occ$fold <- factor(folds$occ.grp)
#
# ggplot(occ, aes(x=x, y=y, color=fold)) +
# geom_point(size=3) +
# geom_sf(data = rnaturalearth::ne_countries(returnclass = "sf"), inherit.aes=F, fill="gray") +
# coord_sf(xlim=c(30, 180), ylim=c(-40, 40)) +
# labs(title="Where is Fold 3?", color="Fold ID") +
# theme_minimal()
```
# Output locations
All figures and tables created by this report are saved under:
- Figures: `outputs/{run_id}/{figures_subdir}/`
- Tables: `outputs/{run_id}/{tables_subdir}/`
```{r}
tibble:: tibble (figures = fig_dir, tables = tab_dir)
```
# Replication
To ensure reproducibility of these results, the following R environment and package versions were used:
```{r}
# ------------------------------------------------------------------------------
# SESSION INFORMATION
# ------------------------------------------------------------------------------
# Prints R version, OS, and package versions.
# 'sessioninfo' provides a cleaner output if available; otherwise uses base R.
if (requireNamespace ("sessioninfo" , quietly = TRUE )) {
sessioninfo:: session_info ()
} else {
sessionInfo ()
}
```